# coding=utf-8


from pyspark.sql import SparkSession

# **********Begin**********#

# 创建SparkSession
spark = SparkSession \
    .builder \
    .appName("Python Spark SQL basic example") \
    .config("spark.sql.crossJoin.enabled", "true") \
    .master("local") \
    .getOrCreate()

# 读取/root/jun.json中数据
df = spark.read.json('/root/jun.json').coalesce(1)

# 创建视图
df.createOrReplaceTempView('table1')

# 统计出全球各研发单位研制的战斗机在全球所有战斗机中的占比
sqlDF = spark.sql(
    "select concat(round((count(`研发单位`)*100/(select count(`研发单位`) from table1 where `研发单位` is not null and `名称` is not null)), 2), '%'),`研发单位` from table1 where `研发单位` is not null and `名称` is not null group by `研发单位` limit 10")
# 保存结果
sqlDF.write.mode('overwrite').format('csv').save('/root/airspark')
# **********End**********#

spark.stop()




